iT邦幫忙

2025 iThome 鐵人賽

DAY 6
0
自我挑戰組

從0開始學習Java系列 第 6

使用 Java HttpURLConnection 下載網頁內容

  • 分享至 

  • xImage
  •  
package ch14;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.zip.GZIPInputStream;

public class Test4 {
	public static void main(String[] args) throws Exception {
		URL url = new URI("https://ithelp.ithome.com.tw/articles/10374533").toURL();
		HttpURLConnection conn = (HttpURLConnection) url.openConnection();

		conn.setRequestMethod("GET");
		conn.setInstanceFollowRedirects(true);
		
		conn.setConnectTimeout(10000);
		conn.setReadTimeout(10000);

		// 關鍵:偽裝成瀏覽器,並要到 gzip
		conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
				+ "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
		conn.setRequestProperty("Accept-Encoding", "gzip");

		int status = conn.getResponseCode();

		InputStream raw = (status >= 200 && status < 400) ? conn.getInputStream()
				: (conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream());

		// 若有壓縮就解壓
		String contentEncoding = conn.getHeaderField("Content-Encoding");
		if (contentEncoding != null && contentEncoding.toLowerCase(Locale.ROOT).contains("gzip")) {
			raw = new GZIPInputStream(raw);
		}

		// 依 Content-Type 解析 charset,沒有就用 UTF-8
		String contentType = conn.getContentType(); // 例:text/html; charset=UTF-8
		Charset cs = parseCharset(contentType, StandardCharsets.UTF_8);

		try (BufferedReader br = new BufferedReader(new InputStreamReader(raw, cs))) {
			String line;
			while ((line = br.readLine()) != null) {
				System.out.println(line);
			}
		} finally {
			conn.disconnect();
		}
	}

	private static Charset parseCharset(String contentType, Charset fallback) {
		if (contentType != null) {
			for (String part : contentType.split(";")) {
				String s = part.trim().toLowerCase(Locale.ROOT);
				if (s.startsWith("charset=")) {
					String name = part.substring(part.indexOf('=') + 1).trim().replace("\"", "");
					try {
						return Charset.forName(name);
					} catch (Exception ignore) {
					}
				}
			}
		}
		return fallback;
	}
}

參考資料
https://www.tutorialspoint.com/java/httpurlconnection_setinstancefollowredirects.htm


上一篇
java.net 範例實作-URL及HttpURLConnection類別
下一篇
java socket 小範例
系列文
從0開始學習Java21
圖片
  熱門推薦
圖片
{{ item.channelVendor }} | {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言